Cadences#
Show imports
import os
from collections import defaultdict, Counter
from git import Repo
import dimcat as dc
import ms3
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
from utils import STD_LAYOUT, CADENCE_COLORS, color_background, value_count_df, get_repo_name, print_heading, resolve_dir
Show source
CORPUS_PATH = os.path.abspath(os.path.join('..', '..'))
print_heading("Notebook settings")
print(f"CORPUS_PATH: {CORPUS_PATH!r}")
CORPUS_PATH = resolve_dir(CORPUS_PATH)
Notebook settings
-----------------
CORPUS_PATH: '/home/runner/work/workflow_deployment/distant_listening_corpus'
Show source
repo = Repo(CORPUS_PATH)
print_heading("Data and software versions")
print(f"Data repo '{get_repo_name(repo)}' @ {repo.commit().hexsha[:7]}")
print(f"dimcat version {dc.__version__}")
print(f"ms3 version {ms3.__version__}")
Data and software versions
--------------------------
Data repo 'distant_listening_corpus' @ c964ccc
dimcat version 0.3.0
ms3 version 2.2.2
dataset = dc.Dataset()
dataset.load(directory=CORPUS_PATH, parse_tsv=False)
[annotated|all|default]
All corpora
-----------
View: This view is called 'annotated'. It
- excludes pieces that are not contained in the metadata,
- filters out file extensions requiring conversion (such as .xml),
- excludes review files and folders, and
- includes only facets containing 'expanded'.
has active expanded
metadata view detected parsed
corpus
ABC yes annotated 70 70
bach_en_fr_suites yes annotated 89 89
bach_solo yes annotated 68 68
bartok_bagatelles yes annotated 14 14
beethoven_piano_sonatas yes annotated 64 64
c_schumann_lieder yes annotated 12 12
chopin_mazurkas yes annotated 55 55
corelli yes annotated 149 149
couperin_clavecin yes annotated 9 9
couperin_concerts yes annotated 84 84
debussy_suite_bergamasque yes annotated 4 4
dvorak_silhouettes yes annotated 12 12
frescobaldi_fiori_musicali yes annotated 48 48
grieg_lyric_pieces yes annotated 66 66
handel_keyboard yes annotated 6 6
jc_bach_sonatas yes annotated 29 29
kleine_geistliche_konzerte yes annotated 55 55
kozeluh_sonatas yes annotated 48 48
liszt_pelerinage yes annotated 19 19
mahler_kindertotenlieder yes annotated 5 5
medtner_tales yes annotated 19 19
mendelssohn_quartets yes annotated 24 24
monteverdi_madrigals yes annotated 27 27
mozart_piano_sonatas yes annotated 54 54
pergolesi_stabat_mater yes annotated 7 7
peri_euridice yes annotated 6 6
pleyel_quartets yes annotated 6 6
poulenc_mouvements_perpetuels yes annotated 3 3
rachmaninoff_piano yes annotated 19 19
ravel_piano yes annotated 3 3
scarlatti_sonatas yes annotated 69 69
schubert_winterreise yes annotated 24 24
schulhoff_suite_dansante_en_jazz yes annotated 6 6
schumann_kinderszenen yes annotated 13 13
schumann_liederkreis yes annotated 12 12
sweelinck_keyboard yes annotated 1 1
tchaikovsky_seasons yes annotated 12 12
wagner_overtures yes annotated 2 2
wf_bach_sonatas yes annotated 9 9
boccherini_ensemble yes annotated 0 0
cpe_bach_keyboard yes annotated 0 0
debussy_childrens_corner yes annotated 0 0
debussy_deux_arabesques yes annotated 0 0
debussy_estampes yes annotated 0 0
debussy_etudes yes annotated 0 0
debussy_images yes annotated 0 0
debussy_other_piano_pieces yes annotated 0 0
debussy_pour_le_piano yes annotated 0 0
debussy_preludes yes annotated 0 0
platti_sonatas yes annotated 0 0
11/12 facets are excluded from this view.
There are 1 orphans that could not be attributed to any of the respective corpus's pieces.
N = 1222 annotated pieces, 1222 parsed dataframes.
Metadata#
all_metadata = dataset.data.metadata()
assert len(all_metadata) > 0, "No pieces selected for analysis."
print(f"Concatenated 'metadata.tsv' files cover {len(all_metadata)} of the {dataset.data.count_pieces()} scores.")
all_metadata.reset_index(level=1).groupby(level=0).nth(0).iloc[:,:20]
Concatenated 'metadata.tsv' files cover 1222 of the 1222 scores.
| piece | TimeSig | KeySig | last_mc | last_mn | length_qb | last_mc_unfolded | last_mn_unfolded | length_qb_unfolded | volta_mcs | all_notes_qb | n_onsets | n_onset_positions | guitar_chord_count | form_label_count | label_count | annotated_key | harmony_version | annotators | reviewers | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| corpus | ||||||||||||||||||||
| ABC | n01op18-1_01 | 1: 3/4 | 1: -1 | 313.0 | 313.0 | 939.00 | 427.0 | 427.0 | 1281.00 | 3132.75 | 4589.0 | 1950.0 | 0.0 | 0.0 | 405.0 | F | 1.0.0 | Markus Neuwirth | NaN | |
| bach_en_fr_suites | BWV806_01_Prelude | 1: 12/8 | 1: 3 | 37.0 | 37.0 | 222.00 | 74.0 | 74.0 | 444.00 | 673.25 | 774.0 | 462.0 | 0.0 | 0.0 | 191.0 | A | 2.3.0 | Adrian Nagel (2.1.0), Davor Krkljus (2.3.0) | EMF, JH, DK | |
| bach_solo | BWV1001_01_Adagio | 1: 4/4 | 1: -1 | 22.0 | 22.0 | 88.00 | 22.0 | 22.0 | 88.00 | 157.50 | 526.0 | 420.0 | 0.0 | 0.0 | 95.0 | g | 2.3.0 | Adrian Nagel | NaN | |
| bartok_bagatelles | op06n01 | 1: 4/4 | 1: 4 | 18.0 | 18.0 | 72.00 | 18.0 | 18.0 | 72.00 | NaN | 121.50 | 135.0 | 109.0 | 0.0 | 0.0 | 25.0 | c# | 2.3.0 | Amelia Brey | JH |
| beethoven_piano_sonatas | 01-1 | 1: 2/2 | 1: -4 | 154.0 | 152.0 | 608.00 | 308.0 | 304.0 | 1216.00 | 1476.00 | 1679.0 | 985.0 | 0.0 | 0.0 | 241.0 | f | 2.3.0 | Lars & Ya-Chuan (2.2.0), John Heilig (2.3.0) | AN | |
| c_schumann_lieder | op13no1 Ich stand in dunklen Traumen | 1: 3/4 | 1: -3 | 37.0 | 37.0 | 111.00 | 37.0 | 37.0 | 111.00 | 552.50 | 931.0 | 225.0 | 0.0 | 0.0 | 103.0 | Eb | 2.3.0 | Adrian Nagel | AB | |
| chopin_mazurkas | BI105-2op30-2 | 1: 3/4 | 1: 2 | 65.0 | 64.0 | 193.00 | 65.0 | 64.0 | 193.00 | 711.00 | 810.0 | 274.0 | 0.0 | 0.0 | 116.0 | b | 2.3.0 | Wendelin Bitzan (1.0.0), Adrian Nagel (2.2.0),... | JH, AN, DK | |
| corelli | op01n01a | 1: 4/4 | 1: -1 | 14.0 | 14.0 | 56.00 | 14.0 | 14.0 | 56.00 | 224.00 | 280.0 | 110.0 | 0.0 | 0.0 | 64.0 | F | 2.3.0 | Lars Opfermann, Ya-Chuan Wu (2.1.1), Hanné Bec... | HB, JH | |
| couperin_clavecin | 00_allemande | 1: 4/4 | 1: -1 | 15.0 | 13.0 | 52.00 | 30.0 | 26.0 | 104.00 | 101.75 | 322.0 | 210.0 | 0.0 | 0.0 | 66.0 | d | 2.3.0 | Adrian Nagel (2.1.0), Davor Krkljus (2.3.0) | DK, Hanné Becker | |
| couperin_concerts | c01n01_prelude | 1: 4/4 | 1: 1 | 25.0 | 23.0 | 98.00 | 25.0 | 23.0 | 98.00 | 219.00 | 386.0 | 251.0 | 0.0 | 0.0 | 93.0 | G | 2.1.0 | Eva-Maria Hamberger | Johannes Menke | |
| debussy_suite_bergamasque | l075-01_suite_prelude | 1: 4/4 | 1: -1 | 89.0 | 89.0 | 356.00 | 89.0 | 89.0 | 356.00 | 1533.67 | 1721.0 | 870.0 | 0.0 | 0.0 | 274.0 | F | 2.3.0 | Adrian Nagel (2.1.1), Amelia Brey (2.3.0) | AB, AN | |
| dvorak_silhouettes | op08n01 | 1: 6/8 | 1: 4, 7: -5, 49: 4 | 54.0 | 52.0 | 156.50 | 54.0 | 52.0 | 156.50 | 658.75 | 957.0 | 288.0 | 0.0 | 0.0 | 80.0 | c# | 2.3.0 | Daniel Grote (2.1.1), Hanné Becker (2.3.0) | Johannes Hentschel (2.1.1), AN | |
| frescobaldi_fiori_musicali | 12.01_Toccata_avanti_la_Messa_della_Domenica | 1: 4/2 | 1: 0 | 8.0 | 8.0 | 64.00 | 8.0 | 8.0 | 64.00 | NaN | 244.00 | 200.0 | 121.0 | 0.0 | 0.0 | 57.0 | d | NaN | NaN | NaN |
| grieg_lyric_pieces | op12n01 | 1: 2/4 | 1: -3 | 23.0 | 23.0 | 46.00 | 23.0 | 23.0 | 46.00 | 135.50 | 268.0 | 156.0 | 0.0 | 0.0 | 43.0 | Eb | 2.3.0 | Adrian Nagel (2.1.1), John Heilig (2.30) | Adrian Nagel | |
| handel_keyboard | hwv430d_Grobschmied_Aria | 1: 4/4 | 1: 4 | 10.0 | 9.0 | 33.00 | 15.0 | 13.0 | 49.00 | 118.00 | 213.0 | 85.0 | 0.0 | 0.0 | 51.0 | E | 2.3.0 | Adrian Nagel (2.1.0), Davor Krkljus (2.3.0) | DK | |
| jc_bach_sonatas | wa01op05no1a_Allegretto | 1: 2/4 | 1: -2 | 84.0 | 82.0 | 166.00 | 168.0 | 164.0 | 332.00 | 313.00 | 896.0 | 638.0 | 0.0 | 0.0 | 120.0 | Bb | 2.3.0 | Adrian Nagel (2.1.1.), Ehsan Mohagheghi Fard (... | AN | |
| kleine_geistliche_konzerte | op08n01swv282_Eile_mich,_Gott,_zu_erretten | 1: 4/4 | 1: 0 | 68.0 | 68.0 | 272.00 | 68.0 | 68.0 | 272.00 | NaN | 507.50 | 315.0 | 251.0 | 0.0 | 0.0 | 84.0 | d | 2.1.1 | Adrian Nagel | NaN |
| kozeluh_sonatas | 09op08no1a | 1: 6/8 | 1: 0 | 135.0 | 135.0 | 405.00 | 194.0 | 194.0 | 582.00 | NaN | 960.00 | 2047.0 | 1351.0 | 0.0 | 0.0 | 272.0 | C | 2.1.0 | Adrian Nagel | NaN |
| liszt_pelerinage | 160.01_Chapelle_de_Guillaume_Tell | 1: 4/4 | 1: 0 | 97.0 | 97.0 | 388.00 | 97.0 | 97.0 | 388.00 | 1902.42 | 2879.0 | 1069.0 | 0.0 | 0.0 | 174.0 | C | 2.3.0 | Adrian Nagel (2.1.1), Amelia Brey (2.3.0) | Johannes Hentschel (1-33 & 82-97), AB, AN | |
| mahler_kindertotenlieder | kindertotenlieder_01_nun_will_die_sonn | 1: 4/4 | 1: -1 | 85.0 | 84.0 | 337.00 | 85.0 | 84.0 | 337.00 | 1064.50 | 989.0 | 532.0 | 0.0 | 0.0 | 179.0 | d | 2.3.0 | Amelia Brey | DK | |
| medtner_tales | op08n01 | 1: 4/8 | 1: -3 | 81.0 | 81.0 | 162.00 | 81.0 | 81.0 | 162.00 | 603.00 | 1481.0 | 528.0 | 0.0 | 0.0 | 213.0 | c | 2.3.0 | Wendelin Bitzan (2.2.0), John Heilig (2.3.0) | Adrian Nagel, DK | |
| mendelssohn_quartets | 01op12a | 1: 4/4 | 1: -3 | 294.0 | 292.0 | 1168.00 | 294.0 | 292.0 | 1168.00 | 4329.00 | 3638.0 | 1702.0 | 0.0 | 0.0 | 673.0 | Eb | 2.1.0 | Adrian Nagel | NaN | |
| monteverdi_madrigals | 2-12 | 1: 4/4 | 1: -1 | 93.0 | 93.0 | 372.00 | 93.0 | 93.0 | 372.00 | NaN | 1374.00 | 1011.0 | 454.0 | 0.0 | 0.0 | 225.0 | F | 2.1.0 | Adrian Nagel | NaN |
| mozart_piano_sonatas | K279-1 | 1: 4/4 | 1: 0 | 100.0 | 100.0 | 400.00 | 200.0 | 200.0 | 800.00 | 767.00 | 2031.0 | 1441.0 | 0.0 | 0.0 | 251.0 | C | NaN | Uli Kneisel | Johannes Hentschel, Markus Neuwirth | |
| pergolesi_stabat_mater | 01. Stabat Mater dolorosa | 1: 4/4 | 1: -4 | 47.0 | 47.0 | 188.00 | 47.0 | 47.0 | 188.00 | NaN | 882.50 | 1068.0 | 368.0 | 7.0 | 0.0 | 166.0 | f | 2.2.0 | Uli Kneisel | NaN |
| peri_euridice | peri_euridice_scene_0 | 1: 4/2 | 1: -1 | 15.0 | 14.0 | 120.00 | 15.0 | 14.0 | 120.00 | NaN | 220.50 | 103.0 | 70.0 | 0.0 | 0.0 | 32.0 | F | 2.3.0 | Davor Krkljus | ST |
| pleyel_quartets | b307op2n1a | 1: 4/4 | 1: 3 | 199.0 | 197.0 | 793.00 | 284.0 | 283.0 | 1133.00 | [[[87], [88]]] | 2694.00 | 3643.0 | 1604.0 | 0.0 | 0.0 | 403.0 | A | 2.3.0 | Adrian Nagel (2.1.0), Davor Krkljus (2.3.0) | DK, AN |
| poulenc_mouvements_perpetuels | 01_assez_modere | 1: 4/4 | 1: 0 | 24.0 | 24.0 | 96.00 | 43.0 | 43.0 | 172.00 | 246.00 | 368.0 | 191.0 | 0.0 | 0.0 | 93.0 | Bb | 2.3.0 | Amelia Brey | DK | |
| rachmaninoff_piano | op42_01a | 1: 3/4 | 1: -1 | 16.0 | 16.0 | 48.00 | 16.0 | 16.0 | 48.00 | NaN | 192.00 | 125.0 | 52.0 | 0.0 | 0.0 | 28.0 | d | 2.3.0 | Amelia Brey | DK |
| ravel_piano | Ravel_-_Jeux_dEau | 1: 4/4, 2: 2/4, 3: 4/4, 8: 2/4, 9: 4/4, 12: 1/... | 1: 4 | 88.0 | 85.0 | 333.25 | 88.0 | 85.0 | 333.25 | NaN | 1143.40 | 4362.0 | 2599.0 | 0.0 | 0.0 | 257.0 | E | 2.1.0 | Adrian Nagel | NaN |
| scarlatti_sonatas | K001 | 1: 4/4 | 1: -1 | 31.0 | 31.0 | 124.00 | 62.0 | 62.0 | 248.00 | 264.50 | 705.0 | 450.0 | 0.0 | 0.0 | 89.0 | d | 2.3.0 | unknown (0.0.0), Davor Krkljus (2.3.0) | DK, JH | |
| schubert_winterreise | n01 | 1: 2/4 | 1: -1, 71: 2, 99: -1 | 105.0 | 105.0 | 210.00 | 137.0 | 137.0 | 274.00 | NaN | 1088.75 | 2174.0 | 505.0 | 0.0 | 0.0 | 215.0 | d | 2.1.0 | Alexander Faschon | Johannes Hentschel |
| schulhoff_suite_dansante_en_jazz | suite_dansante_en_jazz_1_stomp | 1: 2/2 | 1: 0 | 46.0 | 46.0 | 184.00 | 46.0 | 46.0 | 184.00 | 505.83 | 706.0 | 317.0 | 0.0 | 0.0 | 96.0 | E | 2.3.0 | Amelia Brey | DK | |
| schumann_kinderszenen | n01 | 1: 2/4 | 1: 1 | 22.0 | 22.0 | 44.00 | 44.0 | 44.0 | 88.00 | 134.33 | 241.0 | 141.0 | 0.0 | 0.0 | 44.0 | G | 2.3.0 | Tal Soker (2.1.1), John Heilig (2.3.0) | AN, JHei, JH | |
| schumann_liederkreis | op39n01 | 1: 4/4 | 1: 3 | 28.0 | 28.0 | 112.00 | 28.0 | 28.0 | 112.00 | NaN | 301.25 | 663.0 | 433.0 | 0.0 | 0.0 | 47.0 | f# | 2.1.0 | Uli Kneisel | Adrian Nagel |
| sweelinck_keyboard | SwWV258_fantasia_cromatica | 1: 4/4 | 1: -1 | 196.0 | 196.0 | 784.00 | 196.0 | 196.0 | 784.00 | 2502.50 | 2639.0 | 1595.0 | 0.0 | 0.0 | 501.0 | d | 2.1.0 | Adrian Nagel | NaN | |
| tchaikovsky_seasons | op37a01 | 1: 3/4 | 1: 3, 29: 1, 63: 3 | 103.0 | 103.0 | 309.00 | 103.0 | 103.0 | 309.00 | 1058.17 | 1537.0 | 829.0 | 0.0 | 0.0 | 313.0 | A | 2.3.0 | Adrian Nagel (2.1.1), John Heilig (2.3.0) | Johannes Hentschel, AN | |
| wagner_overtures | WWV090_Tristan_01_Vorspiel-Prelude_Ricordi1888... | 1: 6/8 | 1: 0, 44: 3, 72: 0 | 112.0 | 111.0 | 333.50 | 112.0 | 111.0 | 333.50 | NaN | 1224.50 | 1676.0 | 896.0 | 0.0 | 0.0 | 360.0 | a | 2.1.0 | Adrian Nagel | NaN |
| wf_bach_sonatas | F001_n08a | 1: 4/4 | 1: 0 | 63.0 | 63.0 | 252.00 | 126.0 | 126.0 | 504.00 | 602.75 | 1186.0 | 727.0 | 0.0 | 0.0 | 205.0 | C | 2.3.0 | Christos Giannopoulos (1.0.0), Davor Krkljus (... | DK, AN |
All annotation labels from the selected pieces#
all_labels = dataset.data.get_facet('expanded')
print(f"{len(all_labels.index)} hand-annotated harmony labels:")
all_labels.iloc[:20].style.apply(color_background, subset="chord")
232815 hand-annotated harmony labels:
| mc | mn | quarterbeats | quarterbeats_all_endings | duration_qb | mc_onset | mn_onset | timesig | staff | voice | label | globalkey | localkey | pedal | chord | special | numeral | form | figbass | changes | relativeroot | cadence | phraseend | chord_type | globalkey_is_minor | localkey_is_minor | chord_tones | added_tones | root | bass_note | alt_label | volta | pedalend | placement | |||
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| corpus | piece | i | ||||||||||||||||||||||||||||||||||
| ABC | n01op18-1_01 | 0 | 1 | 1 | 0 | 0 | 3.000000 | 0 | 0 | 3/4 | 4 | 1 | F.I | F | I | nan | I | nan | I | nan | nan | nan | nan | nan | nan | M | False | False | (0, 4, 1) | () | 0 | 0 | nan | nan | nan | |
| 1 | 2 | 2 | 3 | 3 | 3.000000 | 0 | 0 | 3/4 | 4 | 1 | V | F | I | nan | V | nan | V | nan | nan | nan | nan | nan | nan | M | False | False | (1, 5, 2) | () | 1 | 1 | nan | nan | nan | |||
| 2 | 3 | 3 | 6 | 6 | 3.000000 | 0 | 0 | 3/4 | 4 | 1 | I | F | I | nan | I | nan | I | nan | nan | nan | nan | nan | nan | M | False | False | (0, 4, 1) | () | 0 | 0 | nan | nan | nan | |||
| 3 | 4 | 4 | 9 | 9 | 6.000000 | 0 | 0 | 3/4 | 4 | 1 | IV6 | F | I | nan | IV6 | nan | IV | nan | 6 | nan | nan | nan | nan | M | False | False | (3, 0, -1) | () | -1 | 3 | nan | nan | nan | |||
| 4 | 6 | 6 | 15 | 15 | 3.000000 | 0 | 0 | 3/4 | 4 | 1 | V65 | F | I | nan | V65 | nan | V | nan | 65 | nan | nan | nan | nan | Mm7 | False | False | (5, 2, -1, 1) | () | 1 | 5 | nan | nan | nan | |||
| 5 | 7 | 7 | 18 | 18 | 1.000000 | 0 | 0 | 3/4 | 4 | 1 | I | F | I | nan | I | nan | I | nan | nan | nan | nan | nan | nan | M | False | False | (0, 4, 1) | () | 0 | 0 | nan | nan | nan | |||
| 6 | 7 | 7 | 19 | 19 | 1.000000 | 1/4 | 1/4 | 3/4 | 4 | 1 | vi | F | I | nan | vi | nan | vi | nan | nan | nan | nan | nan | nan | m | False | False | (3, 0, 4) | () | 3 | 3 | nan | nan | nan | |||
| 7 | 7 | 7 | 20 | 20 | 1.000000 | 1/2 | 1/2 | 3/4 | 4 | 1 | ii6 | F | I | nan | ii6 | nan | ii | nan | 6 | nan | nan | nan | nan | m | False | False | (-1, 3, 2) | () | 2 | -1 | nan | nan | nan | |||
| 8 | 8 | 8 | 21 | 21 | 2.000000 | 0 | 0 | 3/4 | 4 | 1 | V(64) | F | I | nan | V(64) | nan | V | nan | nan | 64 | nan | nan | nan | M | False | False | (1, 0, 4) | () | 1 | 1 | nan | nan | nan | |||
| 9 | 8 | 8 | 23 | 23 | 1.000000 | 1/2 | 1/2 | 3/4 | 4 | 1 | V\\ | F | I | nan | V | nan | V | nan | nan | nan | nan | nan | \\ | M | False | False | (1, 5, 2) | () | 1 | 1 | nan | nan | nan | |||
| 10 | 9 | 9 | 24 | 24 | 3.000000 | 0 | 0 | 3/4 | 4 | 1 | I | F | I | nan | I | nan | I | nan | nan | nan | nan | nan | nan | M | False | False | (0, 4, 1) | () | 0 | 0 | nan | nan | nan | |||
| 11 | 10 | 10 | 27 | 27 | 3.000000 | 0 | 0 | 3/4 | 4 | 1 | V | F | I | nan | V | nan | V | nan | nan | nan | nan | nan | nan | M | False | False | (1, 5, 2) | () | 1 | 1 | nan | nan | nan | |||
| 12 | 11 | 11 | 30 | 30 | 3.000000 | 0 | 0 | 3/4 | 4 | 1 | I | F | I | nan | I | nan | I | nan | nan | nan | nan | nan | nan | M | False | False | (0, 4, 1) | () | 0 | 0 | nan | nan | nan | |||
| 13 | 12 | 12 | 33 | 33 | 6.000000 | 0 | 0 | 3/4 | 4 | 1 | IV6 | F | I | nan | IV6 | nan | IV | nan | 6 | nan | nan | nan | nan | M | False | False | (3, 0, -1) | () | -1 | 3 | nan | nan | nan | |||
| 14 | 14 | 14 | 39 | 39 | 2.000000 | 0 | 0 | 3/4 | 4 | 1 | #viio7(6)/vi | F | I | nan | #viio7(6)/vi | nan | #vii | o | 7 | 6 | vi | nan | nan | o7 | False | False | (8, 5, 4, -1) | () | 8 | 8 | nan | nan | nan | |||
| 15 | 14 | 14 | 41 | 41 | 4.000000 | 1/2 | 1/2 | 3/4 | 4 | 1 | #viio7/vi | F | I | nan | #viio7/vi | nan | #vii | o | 7 | nan | vi | nan | nan | o7 | False | False | (8, 5, 2, -1) | () | 8 | 8 | nan | nan | nan | |||
| 16 | 16 | 16 | 45 | 45 | 2.000000 | 0 | 0 | 3/4 | 4 | 1 | #viio7(4)/ii | F | I | nan | #viio7(4)/ii | nan | #vii | o | 7 | 4 | ii | nan | nan | o7 | False | False | (7, -1, 1, -2) | () | 7 | 7 | nan | nan | nan | |||
| 17 | 16 | 16 | 47 | 47 | 4.000000 | 1/2 | 1/2 | 3/4 | 4 | 1 | #viio7/ii | F | I | nan | #viio7/ii | nan | #vii | o | 7 | nan | ii | nan | nan | o7 | False | False | (7, 4, 1, -2) | () | 7 | 7 | nan | nan | nan | |||
| 18 | 18 | 18 | 51 | 51 | 1.500000 | 0 | 0 | 3/4 | 4 | 1 | ii6(11#7b6) | F | I | nan | ii6(11#7b6) | nan | ii | nan | 6 | 11#7b6 | nan | nan | nan | m | False | False | (-1, -2, 7) | (1,) | 2 | -1 | nan | nan | nan | |||
| 19 | 18 | 18 | 105/2 | 105/2 | 1.500000 | 3/8 | 3/8 | 3/4 | 4 | 1 | ii6 | F | I | nan | ii6 | nan | ii | nan | 6 | nan | nan | nan | nan | m | False | False | (-1, 3, 2) | () | 2 | -1 | nan | nan | nan |
Filtering out pieces without cadence annotations#
hascadence = dc.HasCadenceAnnotationsFilter().process_data(dataset)
assert () in hascadence.indices and len(hascadence.indices[()]) > 0, "No cadences found."
print(f"Before: {len(dataset.indices[()])} pieces; after removing those without cadence labels: {len(hascadence.indices[()])}")
Before: 1222 pieces; after removing those without cadence labels: 853
Show corpora containing pieces with cadence annotations#
grouped_by_corpus = dc.CorpusGrouper().process_data(hascadence)
corpora = {group[0]: f"{len(ixs)} pieces" for group, ixs in grouped_by_corpus.indices.items()}
print(f"{len(corpora)} corpora with {sum(map(len, grouped_by_corpus.indices.values()))} pieces containing cadence annotations:")
corpora
27 corpora with 853 pieces containing cadence annotations:
{'bach_en_fr_suites': '89 pieces',
'bach_solo': '32 pieces',
'bartok_bagatelles': '12 pieces',
'beethoven_piano_sonatas': '64 pieces',
'c_schumann_lieder': '12 pieces',
'chopin_mazurkas': '50 pieces',
'corelli': '148 pieces',
'couperin_clavecin': '9 pieces',
'couperin_concerts': '84 pieces',
'debussy_suite_bergamasque': '4 pieces',
'dvorak_silhouettes': '12 pieces',
'grieg_lyric_pieces': '65 pieces',
'handel_keyboard': '6 pieces',
'jc_bach_sonatas': '29 pieces',
'liszt_pelerinage': '19 pieces',
'mahler_kindertotenlieder': '5 pieces',
'medtner_tales': '19 pieces',
'mozart_piano_sonatas': '54 pieces',
'peri_euridice': '6 pieces',
'pleyel_quartets': '6 pieces',
'poulenc_mouvements_perpetuels': '2 pieces',
'rachmaninoff_piano': '17 pieces',
'scarlatti_sonatas': '69 pieces',
'schulhoff_suite_dansante_en_jazz': '6 pieces',
'schumann_kinderszenen': '13 pieces',
'tchaikovsky_seasons': '12 pieces',
'wf_bach_sonatas': '9 pieces'}
All annotation labels from the selected pieces#
all_labels = hascadence.get_facet('expanded')
print(f"{len(all_labels.index)} hand-annotated harmony labels:")
all_labels.iloc[:10, 13:].style.apply(color_background, subset="chord")
138219 hand-annotated harmony labels:
| pedal | chord | numeral | form | figbass | changes | relativeroot | cadence | phraseend | chord_type | globalkey_is_minor | localkey_is_minor | chord_tones | added_tones | root | bass_note | volta | quarterbeats_all_endings | special | pedalend | placement | |||
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| corpus | fname | interval | |||||||||||||||||||||
| bach_en_fr_suites | BWV806_01_Prelude | [0.0, 1.5) | nan | I | I | nan | nan | nan | nan | nan | { | M | False | False | (0, 4, 1) | () | 0 | 0 | nan | nan | nan | nan | |
| [1.5, 3.0) | nan | V7 | V | nan | 7 | nan | nan | nan | nan | Mm7 | False | False | (1, 5, 2, -1) | () | 1 | 1 | nan | nan | nan | nan | |||
| [3.0, 4.5) | nan | I | I | nan | nan | nan | nan | nan | nan | M | False | False | (0, 4, 1) | () | 0 | 0 | nan | nan | nan | nan | |||
| [4.5, 6.0) | nan | V7 | V | nan | 7 | nan | nan | nan | nan | Mm7 | False | False | (1, 5, 2, -1) | () | 1 | 1 | nan | nan | nan | nan | |||
| [6.0, 7.5) | nan | I | I | nan | nan | nan | nan | nan | nan | M | False | False | (0, 4, 1) | () | 0 | 0 | nan | nan | nan | nan | |||
| [7.5, 9.0) | nan | V | V | nan | nan | nan | nan | nan | nan | M | False | False | (1, 5, 2) | () | 1 | 1 | nan | nan | nan | nan | |||
| [9.0, 10.5) | nan | I | I | nan | nan | nan | nan | nan | nan | M | False | False | (0, 4, 1) | () | 0 | 0 | nan | nan | nan | nan | |||
| [10.5, 12.0) | nan | I6 | I | nan | 6 | nan | nan | nan | nan | M | False | False | (4, 1, 0) | () | 0 | 4 | nan | nan | nan | nan | |||
| [12.0, 17.5) | nan | I | I | nan | nan | nan | nan | nan | }{ | M | False | False | (0, 4, 1) | () | 0 | 0 | nan | nan | nan | nan | |||
| [17.5, 18.0) | nan | IV | IV | nan | nan | nan | nan | nan | nan | M | False | False | (-1, 3, 0) | () | -1 | -1 | nan | nan | nan | nan |
Metadata#
dataset_metadata = hascadence.data.metadata()
hascadence_metadata = dataset_metadata.loc[hascadence.indices[()]]
hascadence_metadata.index.rename('dataset', level=0, inplace=True)
hascadence_metadata.head()
| TimeSig | KeySig | last_mc | last_mn | length_qb | last_mc_unfolded | last_mn_unfolded | length_qb_unfolded | volta_mcs | all_notes_qb | ... | staff_18_ambitus | staff_18_instrument | staff_19_ambitus | staff_19_instrument | staff_20_ambitus | staff_20_instrument | staff_21_ambitus | staff_21_instrument | source.1 | part_name_text | ||
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| dataset | piece | |||||||||||||||||||||
| bach_en_fr_suites | BWV806_01_Prelude | 1: 12/8 | 1: 3 | 37.0 | 37.0 | 222.0 | 74.0 | 74.0 | 444.0 | 673.25 | ... | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | |
| BWV806_02_Allemande | 1: 4/4 | 1: 3 | 34.0 | 32.0 | 128.0 | 68.0 | 64.0 | 256.0 | 498.50 | ... | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | ||
| BWV806_03_Courante_I | 1: 3/2 | 1: 3 | 22.0 | 20.0 | 120.0 | 44.0 | 40.0 | 240.0 | 381.00 | ... | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | ||
| BWV806_04_Courante_II | 1: 3/2 | 1: 3 | 26.0 | 24.0 | 144.0 | 52.0 | 48.0 | 288.0 | 434.50 | ... | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | ||
| BWV806_05_Double_I | 1: 3/2 | 1: 3 | 26.0 | 24.0 | 144.0 | 52.0 | 48.0 | 288.0 | 392.50 | ... | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
5 rows × 117 columns
mean_composition_years = hascadence_metadata.groupby(level=0).composed_end.mean().astype(int).sort_values()
chronological_order = mean_composition_years.index.to_list()
bar_data = pd.concat([mean_composition_years.rename('year'),
hascadence_metadata.groupby(level='dataset').size().rename('pieces')],
axis=1
).reset_index()
fig = px.bar(bar_data, x='year', y='pieces', color='dataset', title='Pieces contained in the dataset')
fig.update_traces(width=5)
Overall#
PAC: Perfect Authentic Cadence
IAC: Imperfect Authentic Cadence
HC: Half Cadence
DC: Deceptive Cadence
EC: Evaded Cadence
PC: Plagal Cadence
print(f"{all_labels.cadence.notna().sum()} cadence labels.")
value_count_df(all_labels.cadence)
8702 cadence labels.
| counts | % | |
|---|---|---|
| cadence | ||
| PAC | 4226 | 0.485635 |
| HC | 2317 | 0.266261 |
| IAC | 1456 | 0.167318 |
| EC | 255 | 0.029304 |
| DC | 163 | 0.018731 |
| HC.SIM | 109 | 0.012526 |
| PC | 85 | 0.009768 |
| HC.CON | 39 | 0.004482 |
| HC.PHR | 37 | 0.004252 |
| HC.TEN | 15 | 0.001724 |
px.pie(all_labels[all_labels.cadence.notna()], names="cadence", color="cadence", color_discrete_map=CADENCE_COLORS)
Per dataset#
cadence_count_per_dataset = all_labels.groupby("corpus").cadence.value_counts()
cadence_fraction_per_dataset = cadence_count_per_dataset / cadence_count_per_dataset.groupby(level=0).sum()
px.bar(cadence_fraction_per_dataset.rename('count').reset_index(), x='corpus', y='count', color='cadence',
color_discrete_map=CADENCE_COLORS, category_orders=dict(dataset=chronological_order))
fig = px.pie(cadence_count_per_dataset.rename('count').reset_index(), names='cadence', color='cadence', values='count',
facet_col='corpus', facet_col_wrap=4, height=2000, color_discrete_map=CADENCE_COLORS)
fig.for_each_annotation(lambda a: a.update(text=a.text.split("=")[-1]))
fig.update_layout(**STD_LAYOUT)
Per phrase#
Number of cadences per phrase#
segmented = dc.PhraseSlicer().process_data(grouped_by_corpus)
phrases = segmented.get_slice_info()
phrase_segments = segmented.get_facet("expanded")
phrase_gpb = phrase_segments.groupby(level=[0,1,2])
local_keys_per_phrase = phrase_gpb.localkey.unique().map(tuple)
n_local_keys_per_phrase = local_keys_per_phrase.map(len)
phrases_with_keys = pd.concat([n_local_keys_per_phrase.rename('n_local_keys'),
local_keys_per_phrase.rename('local_keys'),
phrases], axis=1)
phrases_with_cadences = pd.concat([
phrase_gpb.cadence.nunique().rename('n_cadences'),
phrase_gpb.cadence.unique().rename('cadences').map(lambda l: tuple(e for e in l if not pd.isnull(e))),
phrases_with_keys
], axis=1)
value_count_df(phrases_with_cadences.n_cadences, counts="#phrases")
| #phrases | % | |
|---|---|---|
| n_cadences | ||
| 1 | 7892 | 0.808607 |
| 0 | 1575 | 0.161373 |
| 2 | 280 | 0.028689 |
| 3 | 13 | 0.001332 |
n_cad = phrases_with_cadences.groupby(level='corpus').n_cadences.value_counts().rename('counts').reset_index().sort_values('n_cadences')
n_cad.n_cadences = n_cad.n_cadences.astype(str)
fig = px.bar(n_cad, x='corpus', y='counts', color='n_cadences', height=800, barmode='group',
labels=dict(n_cadences="#cadences in a phrase"),
category_orders=dict(dataset=chronological_order)
)
fig.show()
Combinations of cadence types for phrases with more than one cadence#
value_count_df(phrases_with_cadences[phrases_with_cadences.n_cadences > 1].cadences)
| counts | % | |
|---|---|---|
| cadences | ||
| (EC, PAC) | 70 | 0.238908 |
| (DC, PAC) | 64 | 0.218430 |
| (HC, PAC) | 54 | 0.184300 |
| (IAC, PAC) | 27 | 0.092150 |
| (EC, HC) | 15 | 0.051195 |
| (PAC, HC) | 9 | 0.030717 |
| (DC, HC) | 6 | 0.020478 |
| (HC, DC, PAC) | 5 | 0.017065 |
| (IAC, HC) | 5 | 0.017065 |
| (DC, IAC) | 4 | 0.013652 |
| (HC, IAC) | 4 | 0.013652 |
| (EC, IAC) | 3 | 0.010239 |
| (PAC, IAC) | 3 | 0.010239 |
| (IAC, EC) | 3 | 0.010239 |
| (HC, EC) | 3 | 0.010239 |
| (DC, PC) | 2 | 0.006826 |
| (PAC, DC) | 2 | 0.006826 |
| (PC, PAC) | 2 | 0.006826 |
| (DC, EC, PAC) | 2 | 0.006826 |
| (HC, DC) | 1 | 0.003413 |
| (IAC, DC, PAC) | 1 | 0.003413 |
| (IAC, HC, PAC) | 1 | 0.003413 |
| (DC, HC, PAC) | 1 | 0.003413 |
| (EC, PC) | 1 | 0.003413 |
| (HC, IAC, PAC) | 1 | 0.003413 |
| (PAC, PC) | 1 | 0.003413 |
| (HC, PC) | 1 | 0.003413 |
| (EC, DC, PAC) | 1 | 0.003413 |
| (DC, EC, IAC) | 1 | 0.003413 |
Positioning of cadences within phrases#
df_rows = []
y_position = 0
for ix in phrases_with_cadences[phrases_with_cadences.n_cadences > 0].sort_values('duration_qb').index:
df = phrase_segments.loc[ix]
description = str(ix)
if df.cadence.notna().any():
interval = ix[2]
df_rows.append((y_position, interval.length, "end of phrase", description))
start_pos = interval.left
cadences = df.loc[df.cadence.notna(), ['quarterbeats', 'cadence']]
cadences.quarterbeats -= start_pos
for cadence_x, cadence_type in cadences.itertuples(index=False, name=None):
df_rows.append((y_position, cadence_x, cadence_type, description))
y_position += 1
#else:
# df_rows.append((y_position, pd.NA, pd.NA, description))
data = pd.DataFrame(df_rows, columns=["phrase_ix", "x", "marker", "description"])
fig = px.scatter(data[data.x.notna()], x='x', y="phrase_ix", color="marker", hover_name="description", height=3000,
labels=dict(marker='legend'), color_discrete_map=CADENCE_COLORS)
fig.update_traces(marker_size=5)
fig.update_yaxes(autorange="reversed")
fig.show()
Cadence ultima#
phrase_segments = segmented.get_facet("expanded")
cadence_selector = phrase_segments.cadence.notna()
missing_chord_selector = phrase_segments.chord.isna()
cadence_with_missing_chord_selector = cadence_selector & missing_chord_selector
missing = phrase_segments[cadence_with_missing_chord_selector]
expanded = ms3.expand_dcml.expand_labels(phrase_segments[cadence_with_missing_chord_selector], propagate=False, chord_tones=True, skip_checks=True)
phrase_segments.loc[cadence_with_missing_chord_selector] = expanded
print(f"Ultima harmony missing for {(phrase_segments.cadence.notna() & phrase_segments.bass_note.isna()).sum()} cadence labels.")
Ultima harmony missing for 53 cadence labels.
Ultimae as Roman numeral#
def highlight(row, color="#ffffb3"):
if row.counts < 10:
return [None, None, None, None]
else:
return ["background-color: {color};"] * 4
cadence_counts = all_labels.cadence.value_counts()
ultima_root = phrase_segments.groupby(['localkey_is_minor', 'cadence']).numeral.value_counts().rename('counts').to_frame().reset_index()
ultima_root.localkey_is_minor = ultima_root.localkey_is_minor.map({False: 'in major', True: 'in minor'})
#ultima_root.style.apply(highlight, axis=1)
fig = px.pie(ultima_root, names='numeral', values='counts',
facet_row='cadence', facet_col='localkey_is_minor',
height=1500,
category_orders={'cadence': cadence_counts.index},
)
fig.for_each_annotation(lambda a: a.update(text=a.text.split("=")[-1]))
fig.update_traces(textposition='inside', textinfo='percent+label')
fig.update_layout(**STD_LAYOUT)
fig.show()
#phrase_segments.groupby(level=[0,1,2], group_keys=False).apply(lambda df: df if ((df.cadence == 'PAC') & (df.numeral == 'V')).any() else None)
Ultimae bass note as scale degree#
ultima_bass = phrase_segments.groupby(['localkey_is_minor','cadence']).bass_note.value_counts().rename('counts').reset_index()
ultima_bass.bass_note = ms3.transform(ultima_bass, ms3.fifths2sd, dict(fifths='bass_note', minor='localkey_is_minor'))
ultima_bass.localkey_is_minor = ultima_bass.localkey_is_minor.map({False: 'in major', True: 'in minor'})
#ultima_bass.style.apply(highlight, axis=1)
fig = px.pie(ultima_bass, names='bass_note', values='counts',
facet_row='cadence', facet_col='localkey_is_minor',
height=1500,
category_orders={'cadence': cadence_counts.index},
)
fig.for_each_annotation(lambda a: a.update(text=a.text.split("=")[-1]))
fig.update_traces(textposition='inside', textinfo='percent+label')
fig.update_layout(**STD_LAYOUT)
fig.show()
Chord progressions#
PACs with ultima I/i#
def remove_immediate_duplicates(l):
return tuple(a for a, b in zip(l, (None, ) + l) if a != b)
def get_progressions(selected='PAC', last_row={}, feature='chord', dataset=None, as_series=True, remove_duplicates=False):
"""Uses the nonlocal variable phrase_segments."""
last_row = {k: v if isinstance(v, tuple) else (v,) for k, v in last_row.items()}
progressions = []
for (corp, fname, *_), df in phrase_segments[phrase_segments[feature].notna()].groupby(level=[0,1,2]):
if dataset is not None and dataset not in corp:
continue
if (df.cadence == selected).fillna(False).any():
# remove chords after the last cadence label
df = df[df.cadence.fillna(method='bfill').notna()]
# group segments leading up to a cadence label
cadence_groups = df.cadence.notna().shift().fillna(False).cumsum()
for i, cadence in df.groupby(cadence_groups):
last_r = cadence.iloc[-1]
typ = last_r.cadence
if typ != selected:
continue
if any(last_r[feat] not in values for feat, values in last_row.items()):
continue
if remove_duplicates:
progressions.append(remove_immediate_duplicates(cadence[feature].to_list()))
else:
progressions.append(tuple(cadence[feature]))
if as_series:
return pd.Series(progressions, dtype='object')
return progressions
chord_progressions = get_progressions('PAC', dict(numeral=('I', 'i')), 'chord')
print(f"Progressions for {len(chord_progressions)} cadences:")
value_count_df(chord_progressions, "chord progressions")
Progressions for 4176 cadences:
| counts | % | |
|---|---|---|
| chord progressions | ||
| (I, V7, I) | 15 | 0.003592 |
| (I, V(64), V7, I) | 13 | 0.003113 |
| (V, V7, I, ii6(2), ii6, V7, I) | 12 | 0.002874 |
| (I, I6, IV, V(64), V, I) | 10 | 0.002395 |
| (I, IV, V(64), V, I) | 9 | 0.002155 |
| ... | ... | ... |
| (i/v, IV6(2), IV6, viio, i(4), i, i6, IV6(2)/VI, IV6/VI, viio/VI, VI(4), VI, i6, IV, V(4), V, i) | 1 | 0.000239 |
| (i, V, i/v, V(4)/v, V/v, i/v) | 1 | 0.000239 |
| (i/v, V/v, #viio, i(9), i, V(4), V, i) | 1 | 0.000239 |
| (i, V7, i, V7, I, I6, IV(9), IV, ii6, V7, vi, ii65, V, I) | 1 | 0.000239 |
| (I, I64, I6, IVM7, ii6, ii, iii7, I6, V(64), V, I) | 1 | 0.000239 |
3548 rows × 2 columns
numeral_progressions = get_progressions('PAC', dict(numeral=('I', 'i')), 'numeral')
value_count_df(numeral_progressions, "numeral progressions")
| counts | % | |
|---|---|---|
| numeral progressions | ||
| (I, V, V, I) | 28 | 0.006705 |
| (I, IV, V, V, I) | 22 | 0.005268 |
| (I, V, I) | 20 | 0.004789 |
| (I, ii, V, V, I) | 15 | 0.003592 |
| (I, V, I, V, I, V, I, V, I) | 14 | 0.003352 |
| ... | ... | ... |
| (i, iv, VII, III, VI, ii, i, V, i) | 1 | 0.000239 |
| (iv, VII, III, VI, ii, i, V, i) | 1 | 0.000239 |
| (i, VI, v, v, iv, iv, V, V, V, V, i) | 1 | 0.000239 |
| (i, V, i, V, #vii, III, i, iv, V, i, V, V, i) | 1 | 0.000239 |
| (i, V, III, i, V, V, i, v, iv, iv, V, i, ii, V, i) | 1 | 0.000239 |
3261 rows × 2 columns
numeral_prog_no_dups = numeral_progressions.map(remove_immediate_duplicates)
value_count_df(numeral_prog_no_dups)
| counts | % | |
|---|---|---|
| (I, V, I) | 85 | 0.020354 |
| (I, IV, V, I) | 61 | 0.014607 |
| (I, ii, V, I) | 45 | 0.010776 |
| (I, V, I, V, I) | 36 | 0.008621 |
| (I, V, I, ii, V, I) | 33 | 0.007902 |
| ... | ... | ... |
| (V, iv, V, iv, i, #vii, i, V, i) | 1 | 0.000239 |
| (i, V, #vii, i, ii, iv, V, i, V, iv, V, i, iv, V, i) | 1 | 0.000239 |
| (vi, i, ii, #vii, i, iv, V, iv, V, i) | 1 | 0.000239 |
| (i, V, i, ii, v, iv, I, ii, V, I) | 1 | 0.000239 |
| (I, IV, V, IV, vii, V, I, IV, I, ii, V, I, IV, V, I) | 1 | 0.000239 |
2790 rows × 2 columns
PACs ending on scale degree 1#
Scale degrees expressed w.r.t. major scale, regardless of actual key.
bass_progressions = get_progressions('PAC', dict(bass_note=0), 'bass_note')
bass_prog = bass_progressions.map(ms3.fifths2sd)
print(f"Progressions for {len(bass_progressions)} cadences:")
value_count_df(bass_prog, "bass progressions")
Progressions for 3825 cadences:
| counts | % | |
|---|---|---|
| bass progressions | ||
| (1, 4, 5, 5, 1) | 31 | 0.008105 |
| (1, 5, 1) | 28 | 0.007320 |
| (1, 5, 5, 1) | 20 | 0.005229 |
| (1, 3, 4, 5, 5, 1) | 17 | 0.004444 |
| (1, 2, 3, 4, 5, 5, 1) | 14 | 0.003660 |
| ... | ... | ... |
| (5, 7, 2, 5, 1, 4, 7, 3, 6, 2, 5, 1) | 1 | 0.000261 |
| (6, 7, 1, 4, 5, 1, 4, 4, 5, 3, 2, 5, 1, 4, 4, 5, 3, 2, 5, 1) | 1 | 0.000261 |
| (1, 7, 6, 6, 5, 5, 4, 3, 2, 1) | 1 | 0.000261 |
| (5, 5, 4, 4, 3, 3, 2, 2, 1) | 1 | 0.000261 |
| (1, 2, 1, 2, 2, 4, 6, 6, 5, 5, #4, #4, #4, #4, #4, 4, 4, 3, 5, 5, 5, 1) | 1 | 0.000261 |
2970 rows × 2 columns
bass_prog_no_dups = bass_prog.map(remove_immediate_duplicates)
value_count_df(bass_prog_no_dups)
| counts | % | |
|---|---|---|
| (1, 5, 1) | 76 | 0.019869 |
| (1, 4, 5, 1) | 57 | 0.014902 |
| (1, 5, 1, 5, 1) | 40 | 0.010458 |
| (1, 2, 3, 4, 5, 1) | 29 | 0.007582 |
| (1, 3, 4, 5, 1) | 27 | 0.007059 |
| ... | ... | ... |
| (1, 5, 2, 1, 5, 4, b3, 2, b3, 4, 5, 1) | 1 | 0.000261 |
| (1, 7, b7, 6, b6, 5, 1) | 1 | 0.000261 |
| (1, 5, 2, 7, 1, 5, 1) | 1 | 0.000261 |
| (b3, 4, 5, 1, 2, b3, 4, 5, 1) | 1 | 0.000261 |
| (#5, 6, #5, 6, #5, 6, 7, 1, 7, 1, 7, 1, b3, 4, 5, 1, 5, 1) | 1 | 0.000261 |
2671 rows × 2 columns
def make_sankey(data, labels, node_pos=None, margin={'l': 10, 'r': 10, 'b': 10, 't': 10}, pad=20, color='auto', **kwargs):
if color=='auto':
unique_labels = set(labels)
color_step = 100 / len(unique_labels)
unique_colors = {label: f'hsv({round(i*color_step)}%,100%,100%)' for i, label in enumerate(unique_labels)}
color = list(map(lambda l: unique_colors[l], labels))
fig = go.Figure(go.Sankey(
arrangement = 'snap',
node = dict(
pad = pad,
#thickness = 20,
#line = dict(color = "black", width = 0.5),
label = labels,
x = [node_pos[i][0] if i in node_pos else 0 for i in range(len(labels))] if node_pos is not None else None,
y = [node_pos[i][1] if i in node_pos else 0 for i in range(len(labels))] if node_pos is not None else None,
color = color,
),
link = dict(
source = data.source,
target = data.target,
value = data.value
),
),
)
fig.update_layout(margin=margin, **kwargs)
return fig
def progressions2graph_data(progressions, cut_at_stage=None):
stage_nodes = defaultdict(dict)
edge_weights = Counter()
node_counter = 0
for progression in progressions:
previous_node = None
for stage, current in enumerate(reversed(progression)):
if cut_at_stage and stage > cut_at_stage:
break
if current in stage_nodes[stage]:
current_node = stage_nodes[stage][current]
else:
stage_nodes[stage][current] = node_counter
current_node = node_counter
node_counter += 1
if previous_node is not None:
edge_weights.update([(current_node, previous_node)])
previous_node = current_node
return stage_nodes, edge_weights
def graph_data2sankey(stage_nodes, edge_weights):
data = pd.DataFrame([(u, v, w) for (u, v), w in edge_weights.items()], columns = ['source', 'target', 'value'])
node2label = {node: label for stage, nodes in stage_nodes.items() for label, node in nodes.items()}
labels = [node2label[i] for i in range(len(node2label))]
return make_sankey(data, labels)
def plot_progressions(progressions, cut_at_stage=None):
stage_nodes, edge_weights = progressions2graph_data(progressions, cut_at_stage=cut_at_stage)
return graph_data2sankey(stage_nodes, edge_weights)
Chordal roots for the 3 last stages#
plot_progressions(numeral_prog_no_dups, cut_at_stage=3)
Complete chords for the last four stages in major#
pac_major = get_progressions('PAC', dict(numeral='I', localkey_is_minor=False), 'chord')
plot_progressions(pac_major, cut_at_stage=4)
Bass degrees for the last 6 stages.#
plot_progressions(bass_prog_no_dups, cut_at_stage=7)
Bass degrees without accidentals#
def remove_sd_accidentals(t):
return tuple(map(lambda sd: sd[-1], t))
bass_prog_no_acc_no_dup = bass_prog.map(remove_sd_accidentals).map(remove_immediate_duplicates)
plot_progressions(bass_prog_no_acc_no_dup, cut_at_stage=7)
HCs ending on V#
half = get_progressions('HC', dict(numeral='V'), 'bass_note').map(ms3.fifths2sd)
print(f"Progressions for {len(half)} cadences:")
plot_progressions(half.map(remove_immediate_duplicates), cut_at_stage=5)
Progressions for 2222 cadences: